!pip install tpot
!pip install scikit-learn-intelex
import pandas as pd
import numpy as np
import math
import sys
import sklearn
import tpot
import plotly
import xgboost
import plotly.express as px
import plotly.graph_objects as go
from tpot import TPOTClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import validation_curve
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.tree import plot_tree
from sklearn.manifold import TSNE
from sklearn.metrics import make_scorer
from sklearn import preprocessing
from sklearn import tree
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearnex import patch_sklearn
patch_sklearn()
data = pd.read_csv("./heart.csv")
data_np = data.to_numpy()
data['target'].value_counts()
filtered_data = data[['age', 'sex', 'cp', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']]
filtered_data
train, test = train_test_split(filtered_data, test_size=0.2)
train_X = train.drop('target', axis=1)
train_y = train['target']
test_X = test.drop('target', axis=1)
test_y = test['target']
scaler = preprocessing.StandardScaler().fit(filtered_data.drop('target', axis=1))
normalized_train, normalized_test = train_test_split(filtered_data, test_size=0.2)
normalized_train_X = normalized_train.drop('target', axis=1)
normalized_train_X = scaler.transform(normalized_train_X)
normalized_train_y = normalized_train['target']
normalized_test_X = normalized_test.drop('target', axis=1)
normalized_test_X = scaler.transform(normalized_test_X)
normalized_test_y = normalized_test['target']
normalized_train_X, normalized_train_y
def fitness(y_true, y_pred):
yt = y_true.to_numpy()
accuracy = float(sum(y_pred == yt)) / len(yt)
truePositive = sum([1 if y_pred[i] == 1 and yt[i] == 1 else 0 for i in range(len(y_pred))])
trueNegative = sum([1 if y_pred[i] == 0 and yt[i] == 0 else 0 for i in range(len(y_pred))])
falsePositive = sum([1 if y_pred[i] == 1 and yt[i] == 0 else 0 for i in range(len(y_pred))])
falseNegative = sum([1 if y_pred[i] == 0 and yt[i] == 1 else 0 for i in range(len(y_pred))])
precision = truePositive / (truePositive + falsePositive + 1e-4)
recall = truePositive / (truePositive + falseNegative + 1e-4)
accuracies.append(accuracy)
precisions.append(precision)
recalls.append(recall)
return accuracy + precision + recall
scorer = make_scorer(fitness, greater_is_better=True)
accuracies = []
precisions = []
recalls = []
parameters = {'tol': list(np.random.uniform(1e-6, 100.0, 10)),
'C': list(np.random.uniform(1e-6, 100.0, 10)),
'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=50, population_size=50, offspring_size=12,
verbosity=0, early_stop=12,
config_dict={'sklearn.linear_model.LogisticRegression': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_logistic = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_logistic)
print('Model with optimized hyperparameters: ', fitness(test_y, model_logistic.predict(test_X)))
print('Test Accuracy: ', model_logistic.score(test_X, test_y))
print('Train Accuracy: ', model_logistic.score(train_X, train_y))
clf = LogisticRegression(random_state=0).fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_logistic.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_logistic,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_logistic.fit(X_embedded, train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_logistic, legend=2, ax=ax)
fig.suptitle('Logistic Regression on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'criterion': ['gini', 'entropy'],
'splitter': ['best', 'random'],
'min_impurity_decrease': list(np.linspace(0, 0.1, 20)),
'ccp_alpha': list(np.linspace(0, 0.1, 20))}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=50, population_size=50, offspring_size=30,
verbosity=0, early_stop=50,
config_dict={'sklearn.tree.DecisionTreeClassifier': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_dt = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_dt)
print('Model with optimized hyperparameters: ', fitness(test_y, model_dt.predict(test_X)))
print('Test Accuracy: ', model_dt.score(test_X, test_y))
print('Train Accuracy: ', model_dt.score(train_X, train_y))
clf = tree.DecisionTreeClassifier(random_state=0).fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_dt.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_dt,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
model_dt.feature_names_in_
plt.figure(figsize=(20,20))
plot_tree(model_dt, filled=True, feature_names=model_dt.feature_names_in_, class_names=['No disease', 'Heart disease'])
plt.title("Decision tree")
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_dt.fit(X_embedded, train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_dt, legend=2, ax=ax)
fig.suptitle('Decision Tree on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'criterion': ['gini', 'entropy'],
'max_depth': [2, 3],
'min_impurity_decrease': list(np.linspace(0, 0.1, 20)),
'n_estimators': list(np.linspace(50, 150, 20, dtype=int)),
'ccp_alpha': list(np.linspace(0, 0.1, 20)),
'max_features': ['auto', 'sqrt', 'log2']}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=20, population_size=20, offspring_size=10,
verbosity=2, early_stop=20,
config_dict={'sklearn.ensemble.RandomForestClassifier': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_rf = tpot_classifier.fitted_pipeline_.steps[0][1]
print('Model with optimized hyperparameters: ', fitness(test_y, model_rf.predict(test_X)))
print('Test Accuracy: ', model_rf.score(test_X, test_y))
print('Train Accuracy: ', model_rf.score(train_X, train_y))
clf = sklearn.ensemble.RandomForestClassifier(random_state=0).fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_rf.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_rf,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_rf.fit(X_embedded, train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_rf, legend=2, ax=ax)
fig.suptitle('Decision Tree on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'learning_rate': list(np.linspace(0.1, 2, 20)),
'use_label_encoder': [False],
'validate_parameters': [False],
'disable_default_eval_metric': [True],
'subsample': list(np.linspace(0, 1, 20)),
'colsample_bynode': list(np.linspace(0, 1, 20)),
'reg_lambda': list(np.linspace(0, 0.1, 20))}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=20, population_size=20, offspring_size=10,
verbosity=2, early_stop=5,
config_dict={'xgboost.XGBClassifier': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_xg = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_xg)
print('Model with optimized hyperparameters: ', fitness(test_y, model_xg.predict(test_X)))
print('Test Accuracy: ', model_xg.score(test_X, test_y))
print('Train Accuracy: ', model_xg.score(train_X, train_y))
clf = xgboost.XGBClassifier().fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_xg.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_xg,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_xg.fit(X_embedded, train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_xg, legend=2, ax=ax)
fig.suptitle('XGBoost on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'alpha': list(np.linspace(0.1, 2, 20)),
'norm': [False, True],}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=20, population_size=100, offspring_size=20,
verbosity=2, early_stop=100,
config_dict={'sklearn.naive_bayes.ComplementNB': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_nb = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_nb)
print('Model with optimized hyperparameters: ', fitness(test_y, model_nb.predict(test_X)))
print('Test Accuracy: ', model_nb.score(test_X, test_y))
print('Train Accuracy: ', model_nb.score(train_X, train_y))
clf = sklearn.naive_bayes.ComplementNB().fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_nb.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_nb,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_nb.fit(X_embedded - X_embedded.min(), train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_nb, legend=2, ax=ax)
fig.suptitle('Naive Bayes on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9],
'weights': ['uniform', 'distance'],
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=20, population_size=100, offspring_size=20,
verbosity=2, early_stop=100,
config_dict={'sklearn.neighbors.KNeighborsClassifier': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(train_X, train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_knn = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_knn)
print('Model with optimized hyperparameters: ', fitness(test_y, model_knn.predict(test_X)))
print('Test Accuracy: ', model_knn.score(test_X, test_y))
print('Train Accuracy: ', model_knn.score(train_X, train_y))
clf = sklearn.neighbors.KNeighborsClassifier().fit(train_X, train_y)
print('Model without optimized hyperparameters: ', fitness(test_y, clf.predict(test_X)))
print('Test Accuracy: ', clf.score(test_X, test_y))
print('Train Accuracy: ', clf.score(train_X, train_y))
print(classification_report(test_y, model_knn.predict(test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_knn,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_knn.fit(X_embedded - X_embedded.min(), train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_knn, legend=2, ax=ax)
fig.suptitle('Naive Bayes on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {'hidden_layer_sizes': [(100), (64), (32), (128), (256)],
'activation': ['identity', 'logistic', 'tanh', 'relu'],
'solver': ['lbfgs', 'sgd', 'adam'],
'learning_rate': ['constant', 'invscaling', 'adaptive'],
'alpha': list(np.linspace(0, 0.1, 30)),
'learning_rate_init': list(np.linspace(0, 0.1, 30)),
'power_t': list(np.linspace(0, 1, 30)),
'momentum': list(np.linspace(0.5, 1, 30)),
'nesterovs_momentum': [True, False]
}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=10, population_size=50, offspring_size=10,
verbosity=2, early_stop=2,
config_dict={'sklearn.neural_network.MLPClassifier': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(normalized_train_X, normalized_train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_nn = tpot_classifier.fitted_pipeline_.steps[0][1]
model_nn.fit(normalized_train_X, normalized_train_y)
print(model_nn)
print('Model with optimized hyperparameters: ', fitness(normalized_test_y, model_nn.predict(normalized_test_X)))
print('Test Accuracy: ', model_nn.score(normalized_test_X, normalized_test_y))
print('Train Accuracy: ', model_nn.score(normalized_train_X, normalized_train_y))
clf = sklearn.neural_network.MLPClassifier().fit(normalized_train_X, normalized_train_y)
print('Model without optimized hyperparameters: ', fitness(normalized_test_y, clf.predict(normalized_test_X)))
print('Test Accuracy: ', clf.score(normalized_test_X, normalized_test_y))
print('Train Accuracy: ', clf.score(normalized_train_X, normalized_train_y))
print(classification_report(normalized_test_y, model_nn.predict(normalized_test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_nn,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_nn.fit(X_embedded - X_embedded.min(), train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_nn, legend=2, ax=ax)
fig.suptitle('NN on heart disease')
plt.show()
accuracies = []
precisions = []
recalls = []
parameters = {
'C': list(np.linspace(0, 2, 30)),
'coef0': list(np.linspace(0, 1, 30)),
'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
'degree': [2, 3, 4],
'gamma': ['scale', 'auto'],
}
tpot_classifier = TPOTClassifier(template='Classifier',
generations=10, population_size=50, offspring_size=50,
verbosity=2, early_stop=10,
config_dict={'sklearn.svm.SVC': parameters},
cv=5, scoring=scorer)
tpot_classifier.fit(normalized_train_X, normalized_train_y)
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(len(accuracies))), y=np.array(accuracies),
mode='lines',
name='accuracy'))
fig.add_trace(go.Scatter(x=np.array(range(len(precisions))), y=np.array(precisions),
mode='lines',
name='precision'))
fig.add_trace(go.Scatter(x=np.array(range(len(recalls))), y=np.array(recalls),
mode='lines', name='recall'))
fig.show(renderer="notebook")
model_svm = tpot_classifier.fitted_pipeline_.steps[0][1]
print(model_svm)
print('Model with optimized hyperparameters: ', fitness(normalized_test_y, model_svm.predict(normalized_test_X)))
print('Test Accuracy: ', model_svm.score(normalized_test_X, normalized_test_y))
print('Train Accuracy: ', model_svm.score(normalized_train_X, normalized_train_y))
clf = sklearn.svm.SVC().fit(normalized_train_X, normalized_train_y)
print('Model without optimized hyperparameters: ', fitness(normalized_test_y, clf.predict(normalized_test_X)))
print('Test Accuracy: ', clf.score(normalized_test_X, normalized_test_y))
print('Train Accuracy: ', clf.score(normalized_train_X, normalized_train_y))
print(classification_report(normalized_test_y, model_svm.predict(normalized_test_X)))
fig, ax = plt.subplots(figsize=(7, 7))
confusion_matrix = ConfusionMatrixDisplay.from_estimator(
model_svm,
test_X,
test_y,
display_labels=['No heart disease', 'Heart disease'],
cmap=plt.cm.Blues,
ax=ax
)
plt.show()
X_embedded = TSNE(n_components=2, learning_rate='auto', init='random').fit_transform(train_X)
model_svm.fit(X_embedded - X_embedded.min(), train_y)
fig, ax = plt.subplots()
plot_decision_regions(X_embedded, train_y.to_numpy(), clf=model_svm, legend=2, ax=ax)
fig.suptitle('SVM on heart disease')
plt.show()
accuracies = [model_logistic.fit(train_X, train_y).score(test_X, test_y), model_dt.fit(train_X, train_y).score(test_X, test_y),
model_rf.fit(train_X, train_y).score(test_X, test_y), model_xg.fit(train_X, train_y).score(test_X, test_y),
model_nb.fit(train_X, train_y).score(test_X, test_y), model_knn.fit(train_X, train_y).score(test_X, test_y),
model_nn.fit(normalized_train_X, normalized_train_y).score(normalized_test_X, normalized_test_y),
model_svm.fit(normalized_train_X, normalized_train_y).score(normalized_test_X, normalized_test_y)]
estimators = [('logistic', model_logistic), ('dt', model_dt),
('rf', model_rf), ('xg', model_xg),
('nb', model_nb), ('knn', model_knn),
('nn', model_nn), ('svm', model_svm)]
stackingClassifier = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
stackingAccuracy = stackingClassifier.fit(train_X, train_y).score(test_X, test_y)
accuracies.append(stackingAccuracy)
stackingAccuracy
classifiers = ['Logistic Regression', 'Decision Tree', 'Random Forest',
'XGBoost', 'Naive Bayes', 'K-nn', 'Neural Networks', 'SVM', 'Stacking Classifier']
fig = go.Figure()
fig.add_trace(go.Bar(x=classifiers, y=np.array(accuracies)))
fig.update_layout(title="Accuracies barplot")
fig.show(renderer="notebook")
The genetic algorithm manages to find some well adjusted hyperparameters where the algorithms procude good results most of the time. However, since the dataset only contains about 300 entries from where we cut an additional 20% for the test set, there is not that much room to improve the results.
Since the GA performs cross-validation, the dataset is even smaller while searching for hyperparameters' values. That is why in different runs we obtain different results, sometimes worse than those from an algorithm without tuned parameters. Additionally, the fitness function defined for the GA takes into account the sum of the accuracy, precision and recall, which means that a lower accuracy might have been balanced by a higher recall and so on.
On a previous run we got the following results:
We conclude that the Logistic Regression and the Random Forest were the best performing algorithms on this dataset for this run. However, as stated above, this results vary between runs, given the stochastic nature of the train test split and of the hyperparameters values chosen by the GA.